##Introduction
This Rmarkdown file has a series of simulations run on the models
offered in idealstan. It simulates a large amount of data
and then checks to see if the residuals relative to the true parameters
sum to zero, as well as for credible interval coverage. In general, the
credible intervals aren’t going to be exactly where they should be
because the constraints used in estimating the models and identifying
the signs aren’t themselves included in the simulated data. I.e., one of
the ideal points actually follows a positive continuous distribution
instead of a random normal distribution once it is prefix, which
introduces a slight bias in the recovery of the true parameters. This is
an artifact of virtually every latent variable model, and is not of
great concern as long as the credible intervals reach close to 90
percent coverage.
First, the standard IRT 2-PL (for this one, the absence discriminations are not a part of the model):
bin_irt_2pl_sim <- id_sim_gen(num_person=100,num_items=20,ordinal=F,inflate=F)
bin_irt_2pl_est <- id_estimate(idealdata=bin_irt_2pl_sim,
model_type=1,init_pathfinder = TRUE,
restrict_ind_high = as.character(sort(bin_irt_2pl_sim@simul_data$true_person,
decreasing=T,
index=T)$ix[1]),
restrict_ind_low = as.character(sort(bin_irt_2pl_sim@simul_data$true_person,
decreasing=F,
index=T)$ix[1]),
fix_high = sort(bin_irt_2pl_sim@simul_data$true_person,
decreasing=T)[1],fix_low = sort(bin_irt_2pl_sim@simul_data$true_person,
decreasing=F)[1],
fixtype='prefix',
person_sd = 3,
nchains=2,ncores=4,niters = 500,warmup=500)
## [1] "Running pathfinder to find starting values"
## Finished in 0.6 seconds.
## [1] "Estimating model with full Stan MCMC sampler."
## Running MCMC with 2 parallel chains, with 2 thread(s) per chain...
##
## Chain 2 finished in 14.9 seconds.
## Chain 1 finished in 15.0 seconds.
##
## Both chains finished successfully.
## Mean chain execution time: 15.0 seconds.
## Total execution time: 15.2 seconds.
id_plot_rhats(bin_irt_2pl_est)
id_sim_coverage(bin_irt_2pl_est) %>%
bind_rows(.id='Parameter') %>%
ggplot(aes(y=avg,x=Parameter)) +
stat_summary(fun.args=list(mult=1.96)) +
theme_minimal()
Plot true versus observed values:
id_plot_legis(bin_irt_2pl_est,show_true = T)
id_plot_sims(bin_irt_2pl_est,type='Residuals')
id_plot_sims(bin_irt_2pl_est)
new_data <- bin_irt_2pl_est@score_data@func_args$score_data
c1 <- id_post_pred(bin_irt_2pl_est, newdata=new_data)
## [1] "Processing posterior replications for 2000 scores using 100 posterior samples out of a total of 1000 samples."
## [1] "Now on model 1"
id_plot_ppc(bin_irt_2pl_est,ppc_pred=c1,combine_item=FALSE,
prompt_plot=FALSE)
## [[1]]
## [[1]][[1]]
## NULL
##
## [[1]][[2]]
## NULL
##
## [[1]][[3]]
## NULL
##
## [[1]][[4]]
## NULL
##
## [[1]][[5]]
## NULL
##
## [[1]][[6]]
## NULL
##
## [[1]][[7]]
## NULL
##
## [[1]][[8]]
## NULL
##
## [[1]][[9]]
## NULL
##
## [[1]][[10]]
## NULL
##
## [[1]][[11]]
## NULL
##
## [[1]][[12]]
## NULL
##
## [[1]][[13]]
## NULL
##
## [[1]][[14]]
## NULL
##
## [[1]][[15]]
## NULL
##
## [[1]][[16]]
## NULL
##
## [[1]][[17]]
## NULL
##
## [[1]][[18]]
## NULL
##
## [[1]][[19]]
## NULL
##
## [[1]][[20]]
## NULL
Try this same model but add an external covariate with an ideal point effect of +2
bin_irt_2pl_cov_sim <- id_sim_gen(num_person=100,num_items=20,ordinal=F,inflate=F,
cov_effect = 3)
bin_irt_2pl_cov_est <- id_estimate(idealdata=bin_irt_2pl_cov_sim,
model_type=1,init_pathfinder = TRUE,
restrict_ind_high = as.character(sort(bin_irt_2pl_cov_sim@simul_data$true_person,
decreasing=T,
index=T)$ix[1]),
restrict_ind_low = as.character(sort(bin_irt_2pl_cov_sim@simul_data$true_person,
decreasing=F,
index=T)$ix[1]),
fix_high = sort(bin_irt_2pl_cov_sim@simul_data$true_person,
decreasing=T)[1],fix_low = sort(bin_irt_2pl_cov_sim@simul_data$true_person,
decreasing=F)[1],
fixtype='prefix',
person_sd = 3,
nchains=2,ncores=4,niters = 500,warmup=500)
## [1] "Running pathfinder to find starting values"
## Finished in 0.7 seconds.
## [1] "Estimating model with full Stan MCMC sampler."
## Running MCMC with 2 parallel chains, with 2 thread(s) per chain...
##
## Chain 1 finished in 16.3 seconds.
## Chain 2 finished in 17.1 seconds.
##
## Both chains finished successfully.
## Mean chain execution time: 16.7 seconds.
## Total execution time: 17.1 seconds.
id_plot_rhats(bin_irt_2pl_cov_est)
id_sim_coverage(bin_irt_2pl_cov_est) %>%
bind_rows(.id='Parameter') %>%
ggplot(aes(y=avg,x=Parameter)) +
stat_summary(fun.args=list(mult=1.96)) +
theme_minimal()
Plot true versus observed values:
id_plot_legis(bin_irt_2pl_cov_est,show_true = T)
## [1] "Adding in hierarchical covariates values to the time-varying person scores."
## [1] "Collapsing covariates to person and time IDs."
## [1] "Done!"
id_plot_sims(bin_irt_2pl_cov_est,type='Residuals')
id_plot_sims(bin_irt_2pl_cov_est)
new_data <- bin_irt_2pl_cov_est@score_data@func_args$score_data
c1 <- id_post_pred(bin_irt_2pl_cov_est, newdata=new_data)
## [1] "Processing posterior replications for 2000 scores using 100 posterior samples out of a total of 1000 samples."
## [1] "Adding in hierarchical covariates values to the time-varying person scores."
## [1] "Collapsing covariates to person and time IDs."
## [1] "Done!"
## [1] "Now on model 1"
id_plot_ppc(bin_irt_2pl_cov_est,ppc_pred=c1,combine_item=FALSE,
prompt_plot=FALSE)
## [[1]]
## [[1]][[1]]
## NULL
##
## [[1]][[2]]
## NULL
##
## [[1]][[3]]
## NULL
##
## [[1]][[4]]
## NULL
##
## [[1]][[5]]
## NULL
##
## [[1]][[6]]
## NULL
##
## [[1]][[7]]
## NULL
##
## [[1]][[8]]
## NULL
##
## [[1]][[9]]
## NULL
##
## [[1]][[10]]
## NULL
##
## [[1]][[11]]
## NULL
##
## [[1]][[12]]
## NULL
##
## [[1]][[13]]
## NULL
##
## [[1]][[14]]
## NULL
##
## [[1]][[15]]
## NULL
##
## [[1]][[16]]
## NULL
##
## [[1]][[17]]
## NULL
##
## [[1]][[18]]
## NULL
##
## [[1]][[19]]
## NULL
##
## [[1]][[20]]
## NULL
First, plot estimated coefficient legis_x against true
value:
mcmc_recover_hist(bin_irt_2pl_cov_est@stan_samples$draws("legis_x"),
true=bin_irt_2pl_cov_sim@simul_data$cov_effect)
eps <- 1e-4
new_data1 <- mutate(bin_irt_2pl_cov_sim@score_matrix,
person_x = person_x - eps / 2)
new_data2 <- mutate(bin_irt_2pl_cov_sim@score_matrix,
person_x = person_x + eps / 2)
# now predict new outcome distributions given these slightly different datasets
cov_mod_pred1 <- id_post_pred(bin_irt_2pl_cov_est,newdata=new_data1,
use_cores=4,draws="all",
item_subset=levels(new_data1$item),
type="epred")
## [1] "Processing posterior replications for 2000 scores using all posterior samples out of a total of 1000 samples."
## [1] "Adding in hierarchical covariates values to the time-varying person scores."
## [1] "Collapsing covariates to person and time IDs."
## [1] "Done!"
## [1] "Now on model 1"
cov_mod_pred2 <- id_post_pred(bin_irt_2pl_cov_est,newdata=new_data2,
use_cores=4,draws="all",
item_subset=levels(new_data2$item),
type="epred")
## [1] "Processing posterior replications for 2000 scores using all posterior samples out of a total of 1000 samples."
## [1] "Adding in hierarchical covariates values to the time-varying person scores."
## [1] "Collapsing covariates to person and time IDs."
## [1] "Done!"
## [1] "Now on model 1"
# now we need to difference the datasets at the item level to calculate the effects
# this will create some lists that will then be appended together to a big data frame
# with one observation per posterior draw
print("Looping over items")
## [1] "Looping over items"
# difference the predictions
c1 <- purrr::map2(cov_mod_pred1[[1]],
cov_mod_pred2[[1]],
function(small,big) {
# difference the effects
(big - small) / eps
})
# combine into datasets for each item with item id + person (senator) id
c2 <- lapply(c1, function(mat) {
out_data <- attr(mat, "data")
colnames(mat) <- out_data$person_id
as_tibble(mat) %>%
mutate(draws=1:n(),
item_id=unique(out_data$item_id)) %>%
gather(key="person_id",value="estimate",-draws,-item_id) %>%
mutate(person_id=as.numeric(person_id),
estimate=as.numeric(estimate))
}) %>% bind_rows
# merge in some original data
to_merge <- mutate(bin_irt_2pl_cov_est@score_data@score_matrix,
item_orig=item_id,
person_orig=person_id,
person_id=as.numeric(person_id),
item_id=as.numeric(item_id)) %>%
select(person_id, item_id, group_id,item_orig, person_orig) %>%
distinct
# add in party data so we can calculate party-specific effects
c2 <- left_join(c2, to_merge,
by=c("item_id","person_id"))
# get effect separately by democrats/republicans
by_party <- group_by(c2, draws, group_id, item_id, item_orig) %>%
summarize(mean_est1=mean(estimate)) %>%
group_by(group_id, item_id, item_orig) %>%
summarize(mean_est=mean(mean_est1),
low_est=quantile(mean_est1, .05),
high_est=quantile(mean_est1, .95))
# merge in item discrimination to add some color / show high-discrim votes
item_discrim <- filter(bin_irt_2pl_cov_est@summary,
grepl(x=variable, pattern="sigma\\_reg\\_free")) %>%
mutate(item_id=as.numeric(stringr::str_extract(variable, "[0-9]+")))
by_party <- left_join(by_party,
select(item_discrim, median, item_id))
# plot the result
by_party %>%
ggplot(aes(y=mean_est,
x=reorder(item_id,mean_est))) +
geom_linerange(aes(ymin=low_est,
ymax=high_est,
colour=`median`)) +
ggthemes::theme_tufte() +
scale_colour_viridis_c(name="Discrimination") +
coord_flip() +
geom_hline(yintercept=0,linetype=2) +
ggthemes::theme_clean() +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank()) +
theme(legend.position = "bottom")
Next, inflated 2-PL IRT (binary):
bin_irt_2pl_abs_sim <- id_sim_gen(num_person=20,num_items=100,ordinal=F,inflate=T,
absence_diff_mean=0)
bin_irt_2pl_abs_est <- id_estimate(idealdata=bin_irt_2pl_abs_sim,
model_type=2,
restrict_ind_high = as.character(sort(bin_irt_2pl_abs_sim@simul_data$true_person,
decreasing=T,
index=T)$ix[1]),
restrict_ind_low = as.character(sort(bin_irt_2pl_abs_sim@simul_data$true_person,
decreasing=F,
index=T)$ix[1]),
fix_high=sort(bin_irt_2pl_abs_sim@simul_data$true_person,
decreasing=T)[1],
fix_low=sort(bin_irt_2pl_abs_sim@simul_data$true_person,
decreasing=F)[1],
fixtype='prefix')
## [1] "Running pathfinder to find starting values"
## Finished in 0.9 seconds.
## [1] "Estimating model with full Stan MCMC sampler."
## Running MCMC with 4 parallel chains, with 1 thread(s) per chain...
##
## Chain 3 finished in 53.0 seconds.
## Chain 4 finished in 53.2 seconds.
## Chain 2 finished in 53.6 seconds.
## Chain 1 finished in 59.4 seconds.
##
## All 4 chains finished successfully.
## Mean chain execution time: 54.8 seconds.
## Total execution time: 59.5 seconds.
id_plot_rhats(bin_irt_2pl_abs_est)
Plot true versus observed values:
id_plot_legis(bin_irt_2pl_abs_est,show_true = T)
id_sim_coverage(bin_irt_2pl_abs_est) %>%
bind_rows(.id='Parameter') %>%
ggplot(aes(y=avg,x=Parameter)) +
stat_summary(fun.args=list(mult=1.96)) +
theme_minimal()
id_plot_sims(bin_irt_2pl_abs_est,type='Residuals')
id_plot_sims(bin_irt_2pl_abs_est)
bin_irt_2pl_abs_est %>%
id_post_pred %>%
id_plot_ppc(bin_irt_2pl_abs_est,ppc_pred=.,combine_item=FALSE,prompt_plot=FALSE)
## [1] "Processing posterior replications for 2000 scores using 100 posterior samples out of a total of 4000 samples."
## [1] "Now on model 2"
## [[1]]
## [[1]][[1]]
## NULL
##
## [[1]][[2]]
## NULL
##
## [[1]][[3]]
## NULL
##
## [[1]][[4]]
## NULL
##
## [[1]][[5]]
## NULL
##
## [[1]][[6]]
## NULL
##
## [[1]][[7]]
## NULL
##
## [[1]][[8]]
## NULL
##
## [[1]][[9]]
## NULL
##
## [[1]][[10]]
## NULL
##
## [[1]][[11]]
## NULL
##
## [[1]][[12]]
## NULL
##
## [[1]][[13]]
## NULL
##
## [[1]][[14]]
## NULL
##
## [[1]][[15]]
## NULL
##
## [[1]][[16]]
## NULL
##
## [[1]][[17]]
## NULL
##
## [[1]][[18]]
## NULL
##
## [[1]][[19]]
## NULL
##
## [[1]][[20]]
## NULL
##
## [[1]][[21]]
## NULL
##
## [[1]][[22]]
## NULL
##
## [[1]][[23]]
## NULL
##
## [[1]][[24]]
## NULL
##
## [[1]][[25]]
## NULL
##
## [[1]][[26]]
## NULL
##
## [[1]][[27]]
## NULL
##
## [[1]][[28]]
## NULL
##
## [[1]][[29]]
## NULL
##
## [[1]][[30]]
## NULL
##
## [[1]][[31]]
## NULL
##
## [[1]][[32]]
## NULL
##
## [[1]][[33]]
## NULL
##
## [[1]][[34]]
## NULL
##
## [[1]][[35]]
## NULL
##
## [[1]][[36]]
## NULL
##
## [[1]][[37]]
## NULL
##
## [[1]][[38]]
## NULL
##
## [[1]][[39]]
## NULL
##
## [[1]][[40]]
## NULL
##
## [[1]][[41]]
## NULL
##
## [[1]][[42]]
## NULL
##
## [[1]][[43]]
## NULL
##
## [[1]][[44]]
## NULL
##
## [[1]][[45]]
## NULL
##
## [[1]][[46]]
## NULL
##
## [[1]][[47]]
## NULL
##
## [[1]][[48]]
## NULL
##
## [[1]][[49]]
## NULL
##
## [[1]][[50]]
## NULL
##
## [[1]][[51]]
## NULL
##
## [[1]][[52]]
## NULL
##
## [[1]][[53]]
## NULL
##
## [[1]][[54]]
## NULL
##
## [[1]][[55]]
## NULL
##
## [[1]][[56]]
## NULL
##
## [[1]][[57]]
## NULL
##
## [[1]][[58]]
## NULL
##
## [[1]][[59]]
## NULL
##
## [[1]][[60]]
## NULL
##
## [[1]][[61]]
## NULL
##
## [[1]][[62]]
## NULL
##
## [[1]][[63]]
## NULL
##
## [[1]][[64]]
## NULL
##
## [[1]][[65]]
## NULL
##
## [[1]][[66]]
## NULL
##
## [[1]][[67]]
## NULL
##
## [[1]][[68]]
## NULL
##
## [[1]][[69]]
## NULL
##
## [[1]][[70]]
## NULL
##
## [[1]][[71]]
## NULL
##
## [[1]][[72]]
## NULL
##
## [[1]][[73]]
## NULL
##
## [[1]][[74]]
## NULL
##
## [[1]][[75]]
## NULL
##
## [[1]][[76]]
## NULL
##
## [[1]][[77]]
## NULL
##
## [[1]][[78]]
## NULL
##
## [[1]][[79]]
## NULL
##
## [[1]][[80]]
## NULL
##
## [[1]][[81]]
## NULL
##
## [[1]][[82]]
## NULL
##
## [[1]][[83]]
## NULL
##
## [[1]][[84]]
## NULL
##
## [[1]][[85]]
## NULL
##
## [[1]][[86]]
## NULL
##
## [[1]][[87]]
## NULL
##
## [[1]][[88]]
## NULL
##
## [[1]][[89]]
## NULL
##
## [[1]][[90]]
## NULL
##
## [[1]][[91]]
## NULL
##
## [[1]][[92]]
## NULL
##
## [[1]][[93]]
## NULL
##
## [[1]][[94]]
## NULL
##
## [[1]][[95]]
## NULL
##
## [[1]][[96]]
## NULL
##
## [[1]][[97]]
## NULL
##
## [[1]][[98]]
## NULL
##
## [[1]][[99]]
## NULL
##
## [[1]][[100]]
## NULL
Now we’ll start with the ordinal models. First the uninflated ordinal model:
ord_irt_sim <- id_sim_gen(num_person=20,num_items=100,inflate=F,
absence_diff_mean=0,model_type='ordinal_ratingscale')
ord_irt_est <- id_estimate(idealdata=ord_irt_sim,
model_type=3,
restrict_ind_high = as.character(sort(ord_irt_sim@simul_data$true_person,decreasing=T,
index=T)$ix[1]),
restrict_ind_low = as.character(sort(ord_irt_sim@simul_data$true_person,decreasing=F,
index=T)$ix[1]),
fix_high=sort(ord_irt_sim@simul_data$true_person,decreasing=T)[1],
fix_low=sort(ord_irt_sim@simul_data$true_person,decreasing=F)[1],
fixtype='prefix')
## [1] "Running pathfinder to find starting values"
## Finished in 1.2 seconds.
## [1] "Estimating model with full Stan MCMC sampler."
## Running MCMC with 4 parallel chains, with 1 thread(s) per chain...
## Chain 1 finished in 135.5 seconds.
## Chain 4 finished in 161.1 seconds.
## Chain 2 finished in 164.9 seconds.
## Chain 3 finished in 167.0 seconds.
##
## All 4 chains finished successfully.
## Mean chain execution time: 157.1 seconds.
## Total execution time: 167.2 seconds.
id_plot_rhats(ord_irt_est)
Plot true versus observed values:
id_plot_legis(ord_irt_est,show_true = T)
id_sim_coverage(ord_irt_est) %>%
bind_rows(.id='Parameter') %>%
ggplot(aes(y=avg,x=Parameter)) +
stat_summary(fun.args=list(mult=1.96)) +
theme_minimal()
id_plot_sims(ord_irt_est,type='Residuals')
id_plot_sims(ord_irt_est)
ord_irt_est %>%
id_post_pred %>%
id_plot_ppc(ord_irt_est,ppc_pred=.)
## [1] "Processing posterior replications for 2000 scores using 100 posterior samples out of a total of 4000 samples."
## [1] "Now on model 3"
## [1] "Predicting outcomes for model ID 3."
And we will finish with the inflated ordinal model:
ord_irt_abs_sim <- id_sim_gen(num_person=20,num_items=100,inflate=T,
absence_diff_mean=0,model_type='ordinal_ratingscale')
ord_irt_abs_est <- id_estimate(idealdata=ord_irt_abs_sim,
model_type=4,
restrict_ind_high = as.character(sort(ord_irt_abs_sim@simul_data$true_person,
decreasing=T,
index=T)$ix[1]),
restrict_ind_low = as.character(sort(ord_irt_abs_sim@simul_data$true_person,
decreasing=F,
index=T)$ix[1]),
fixtype='prefix')
## [1] "Running pathfinder to find starting values"
## Finished in 1.5 seconds.
## [1] "Estimating model with full Stan MCMC sampler."
## Running MCMC with 4 parallel chains, with 1 thread(s) per chain...
## Chain 4 finished in 86.1 seconds.
## Chain 2 finished in 89.4 seconds.
## Chain 3 finished in 91.0 seconds.
## Chain 1 finished in 91.2 seconds.
##
## All 4 chains finished successfully.
## Mean chain execution time: 89.4 seconds.
## Total execution time: 91.3 seconds.
id_plot_rhats(ord_irt_abs_est)
id_plot_legis(ord_irt_abs_est,show_true = T)
id_sim_coverage(ord_irt_abs_est) %>%
bind_rows(.id='Parameter') %>%
ggplot(aes(y=avg,x=Parameter)) +
stat_summary(fun.args=list(mult=1.96)) +
theme_minimal()
id_plot_sims(ord_irt_abs_est,type='Residuals')
id_plot_sims(ord_irt_abs_est)
ord_irt_abs_est %>%
id_post_pred %>%
id_plot_ppc(ord_irt_abs_est,ppc_pred=.)
## [1] "Processing posterior replications for 2000 scores using 100 posterior samples out of a total of 4000 samples."
## [1] "Now on model 4"
## [1] "Predicting outcomes for model ID 4."
We can now try the ordinal graded response version (non-inflated):
ord_irt_grm_sim <- id_sim_gen(num_person=20,num_items=100,inflate=F,
absence_diff_mean=0,model_type='ordinal_grm')
ord_irt_grm_est <- id_estimate(idealdata=ord_irt_grm_sim,
model_type=5,
restrict_ind_high = as.character(sort(ord_irt_grm_sim@simul_data$true_person,
decreasing=T,
index=T)$ix[1]),
restrict_ind_low = as.character(sort(ord_irt_grm_sim@simul_data$true_person,
decreasing=F,
index=T)$ix[1]),
fixtype='prefix')
## [1] "Running pathfinder to find starting values"
## Finished in 1.1 seconds.
## [1] "Estimating model with full Stan MCMC sampler."
## Running MCMC with 4 parallel chains, with 1 thread(s) per chain...
## Chain 3 finished in 292.4 seconds.
## Chain 2 finished in 294.7 seconds.
## Chain 1 finished in 295.3 seconds.
## Chain 4 finished in 296.0 seconds.
##
## All 4 chains finished successfully.
## Mean chain execution time: 294.6 seconds.
## Total execution time: 296.2 seconds.
id_plot_rhats(ord_irt_grm_est)
id_sim_coverage(ord_irt_grm_est) %>%
bind_rows(.id='Parameter') %>%
ggplot(aes(y=avg,x=Parameter)) +
stat_summary(fun.args=list(mult=1.96)) +
theme_minimal()
id_plot_legis(ord_irt_grm_est,show_true = T)
id_plot_sims(ord_irt_grm_est,type='Residuals')
id_plot_sims(ord_irt_grm_est)
ord_irt_grm_est %>%
id_post_pred %>%
id_plot_ppc(ord_irt_grm_est,ppc_pred=.)
## [1] "Processing posterior replications for 2000 scores using 100 posterior samples out of a total of 4000 samples."
## [1] "Now on model 5"
## [1] "Predicting outcomes for model ID 5."
And now the inflated GRM:
ord_irt_grm_abs_sim <- id_sim_gen(num_person=20,num_items=100,inflate=T,
absence_diff_mean=0,model_type='ordinal_grm')
ord_irt_grm_abs_est <- id_estimate(idealdata=ord_irt_grm_abs_sim,
model_type=6,
restrict_ind_high = as.character(sort(ord_irt_grm_abs_sim@simul_data$true_person,
decreasing=T,
index=T)$ix[1]),
restrict_ind_low = as.character(sort(ord_irt_grm_abs_sim@simul_data$true_person,
decreasing=F,
index=T)$ix[1]),
fixtype='prefix')
## [1] "Running pathfinder to find starting values"
## Finished in 1.4 seconds.
## [1] "Estimating model with full Stan MCMC sampler."
## Running MCMC with 4 parallel chains, with 1 thread(s) per chain...
## Chain 1 finished in 166.7 seconds.
## Chain 4 finished in 179.4 seconds.
## Chain 2 finished in 187.9 seconds.
## Chain 3 finished in 195.5 seconds.
##
## All 4 chains finished successfully.
## Mean chain execution time: 182.4 seconds.
## Total execution time: 195.7 seconds.
id_plot_rhats(ord_irt_grm_abs_est)
id_plot_legis(ord_irt_grm_abs_est,show_true = T)
id_sim_coverage(ord_irt_grm_abs_est) %>%
bind_rows(.id='Parameter') %>%
ggplot(aes(y=avg,x=Parameter)) +
stat_summary(fun.args=list(mult=1.96)) +
theme_minimal()
id_plot_sims(ord_irt_grm_abs_est,type='Residuals')
id_plot_sims(ord_irt_grm_abs_est)
ord_irt_grm_abs_est %>%
id_post_pred %>%
id_plot_ppc(ord_irt_grm_abs_est,ppc_pred=.)
## [1] "Processing posterior replications for 2000 scores using 100 posterior samples out of a total of 4000 samples."
## [1] "Now on model 6"
## [1] "Predicting outcomes for model ID 6."
Models I am adding to this release include the Poisson model:
ord_irt_poisson_sim <- id_sim_gen(num_person=20,num_items=100,inflate=F,
absence_diff_mean=0,model_type='poisson')
ord_irt_poisson_est <- id_estimate(idealdata=ord_irt_poisson_sim,
model_type=7,restrict_ind_high = as.character(sort(ord_irt_poisson_sim@simul_data$true_person,
decreasing=T,
index=T)$ix[1]),
restrict_ind_low = as.character(sort(ord_irt_poisson_sim@simul_data$true_person,
decreasing=F,
index=T)$ix[1]),
fix_high=sort(ord_irt_poisson_sim@simul_data$true_person,
decreasing=T)[1],
fix_low=sort(ord_irt_poisson_sim@simul_data$true_person,
decreasing=F)[1],
fixtype='prefix',nchains=2,ncores=8)
## [1] "Running pathfinder to find starting values"
## Finished in 4.1 seconds.
## [1] "Estimating model with full Stan MCMC sampler."
## Running MCMC with 2 parallel chains, with 4 thread(s) per chain...
##
## Chain 2 finished in 73.2 seconds.
## Chain 1 finished in 75.5 seconds.
##
## Both chains finished successfully.
## Mean chain execution time: 74.4 seconds.
## Total execution time: 75.7 seconds.
id_plot_rhats(ord_irt_poisson_est)
id_plot_legis(ord_irt_poisson_est,show_true = T)
id_sim_coverage(ord_irt_poisson_est) %>%
bind_rows(.id='Parameter') %>%
ggplot(aes(y=avg,x=Parameter)) +
stat_summary(fun.args=list(mult=1.96)) +
theme_minimal()
id_plot_sims(ord_irt_poisson_est,type='Residuals')
id_plot_sims(ord_irt_poisson_est)
ord_irt_poisson_est %>%
id_post_pred %>%
id_plot_ppc(ord_irt_poisson_est,ppc_pred=.)
## [1] "Processing posterior replications for 2000 scores using 100 posterior samples out of a total of 2000 samples."
## [1] "Now on model 7"
## [1] "Predicting outcomes for model ID 7."
Now Poisson-inflated:
ord_irt_poisson_abs_sim <- id_sim_gen(num_person=20,num_items=100,inflate=T,
absence_diff_mean=0,model_type='poisson')
ord_irt_poisson_abs_est <- id_estimate(idealdata=ord_irt_poisson_abs_sim,
model_type=8,ncores = 8,
nchains=2,
restrict_ind_high = as.character(sort(ord_irt_poisson_abs_sim@simul_data$true_person,
decreasing=T,
index=T)$ix[1]),
restrict_ind_low = as.character(sort(ord_irt_poisson_abs_sim@simul_data$true_person,
decreasing=F,
index=T)$ix[1]),
fix_high = sort(ord_irt_poisson_abs_sim@simul_data$true_person,
decreasing=T)[1],fix_low = sort(ord_irt_poisson_abs_sim@simul_data$true_person,
decreasing=F)[1],
fixtype='prefix')
## [1] "Running pathfinder to find starting values"
## Finished in 6.1 seconds.
## [1] "Estimating model with full Stan MCMC sampler."
## Running MCMC with 2 parallel chains, with 4 thread(s) per chain...
##
## Chain 1 finished in 174.1 seconds.
## Chain 2 finished in 178.8 seconds.
##
## Both chains finished successfully.
## Mean chain execution time: 176.4 seconds.
## Total execution time: 178.9 seconds.
id_plot_rhats(ord_irt_poisson_abs_est)
id_plot_legis(ord_irt_poisson_abs_est,show_true = T)
id_sim_coverage(ord_irt_poisson_abs_est) %>%
bind_rows(.id='Parameter') %>%
ggplot(aes(y=avg,x=Parameter)) +
stat_summary(fun.args=list(mult=1.96)) +
theme_minimal()
id_plot_sims(ord_irt_poisson_abs_est,type='Residuals')
id_plot_sims(ord_irt_poisson_abs_est)
ord_irt_poisson_abs_est %>%
id_post_pred %>%
id_plot_ppc(ord_irt_poisson_abs_est,ppc_pred=.)
## [1] "Processing posterior replications for 2000 scores using 100 posterior samples out of a total of 2000 samples."
## [1] "Now on model 8"
## [1] "Predicting outcomes for model ID 8."
ord_irt_poisson_abs_est %>%
id_post_pred(output='missing') %>%
id_plot_ppc(ord_irt_poisson_abs_est,ppc_pred=.)
## [1] "Processing posterior replications for 2000 scores using 100 posterior samples out of a total of 2000 samples."
## [1] "Now on model 8"
## [1] "Predicting outcomes for model ID 8."
A Normally-distributed outcome with no inflation:
ord_irt_normal_sim <- id_sim_gen(num_person=20,num_items=100,ordinal=F,inflate=F,
absence_diff_mean=0,model_type='normal')
ord_irt_normal_est <- id_estimate(idealdata=ord_irt_normal_sim,
model_type=9,restrict_sd_high = .1,
restrict_sd_low=.1,
restrict_ind_high = as.character(sort(ord_irt_normal_sim@simul_data$true_person,
decreasing=T,
index=T)$ix[1]),
restrict_ind_low = as.character(sort(ord_irt_normal_sim@simul_data$true_person,
decreasing=F,
index=T)$ix[1]),fix_high = sort(ord_irt_normal_sim@simul_data$true_person,
decreasing=T)[1],fix_low = sort(ord_irt_normal_sim@simul_data$true_person,
decreasing=F)[1],
fixtype='prefix',compile_optim=F,
het_var=FALSE)
## [1] "Running pathfinder to find starting values"
## Finished in 0.6 seconds.
## [1] "Estimating model with full Stan MCMC sampler."
## Running MCMC with 4 parallel chains, with 1 thread(s) per chain...
## Chain 3 finished in 53.3 seconds.
## Chain 2 finished in 54.2 seconds.
## Chain 1 finished in 55.3 seconds.
## Chain 4 finished in 55.5 seconds.
##
## All 4 chains finished successfully.
## Mean chain execution time: 54.6 seconds.
## Total execution time: 55.6 seconds.
id_plot_rhats(ord_irt_normal_est)
id_plot_legis(ord_irt_normal_est,show_true = T)
id_sim_coverage(ord_irt_normal_est) %>%
bind_rows(.id='Parameter') %>%
ggplot(aes(y=avg,x=Parameter)) +
stat_summary(fun.args=list(mult=1.96)) +
theme_minimal()
id_plot_sims(ord_irt_normal_est,type='Residuals')
id_plot_sims(ord_irt_normal_est)
ord_irt_normal_est %>%
id_post_pred %>%
id_plot_ppc(ord_irt_normal_est,ppc_pred=.)
## [1] "Processing posterior replications for 2000 scores using 100 posterior samples out of a total of 4000 samples."
## [1] "Now on model 9"
## [1] "Predicting outcomes for model ID 9."
A Normally-distributed outcome with inflation:
ord_irt_normal_abs_sim <- id_sim_gen(num_person=20,num_items=100,inflate=T,
absence_diff_mean=0,model_type='normal')
ord_irt_normal_abs_est <- id_estimate(idealdata=ord_irt_normal_abs_sim,
model_type=10,
restrict_ind_high = as.character(sort(ord_irt_normal_abs_sim@simul_data$true_person,
decreasing=T,
index=T)$ix[1]),
restrict_ind_low = as.character(sort(ord_irt_normal_abs_sim@simul_data$true_person,
decreasing=F,
index=T)$ix[1]),
fix_high = sort(ord_irt_normal_abs_sim@simul_data$true_person,
decreasing=T)[1],fix_low = sort(ord_irt_normal_abs_sim@simul_data$true_person,
decreasing=F)[1],nchains = 2,ncores=4,
fixtype='prefix')
## [1] "Running pathfinder to find starting values"
## Finished in 6.1 seconds.
## [1] "Estimating model with full Stan MCMC sampler."
## Running MCMC with 2 parallel chains, with 2 thread(s) per chain...
## Chain 1 finished in 157.1 seconds.
## Chain 2 finished in 247.3 seconds.
##
## Both chains finished successfully.
## Mean chain execution time: 202.2 seconds.
## Total execution time: 247.4 seconds.
id_plot_rhats(ord_irt_normal_abs_est)
id_plot_legis(ord_irt_normal_abs_est,show_true = T)
id_sim_coverage(ord_irt_normal_abs_est) %>%
bind_rows(.id='Parameter') %>%
ggplot(aes(y=avg,x=Parameter)) +
stat_summary(fun.args=list(mult=1.96)) +
theme_minimal()
id_plot_sims(ord_irt_normal_abs_est,type='Residuals')
id_plot_sims(ord_irt_normal_abs_est)
ord_irt_normal_abs_est %>%
id_post_pred %>%
id_plot_ppc(ord_irt_normal_abs_est,ppc_pred=.)
## [1] "Processing posterior replications for 2000 scores using 100 posterior samples out of a total of 2000 samples."
## [1] "Now on model 10"
## [1] "Predicting outcomes for model ID 10."
ord_irt_normal_abs_est %>%
id_post_pred(output='missing') %>%
id_plot_ppc(ord_irt_normal_abs_est,ppc_pred=.)
## [1] "Processing posterior replications for 2000 scores using 100 posterior samples out of a total of 2000 samples."
## [1] "Now on model 10"
## [1] "Predicting outcomes for model ID 10."
A Lognormal model with no inflation:
ord_irt_lognormal_sim <- id_sim_gen(num_person=20,num_items=100,ordinal=F,inflate=F,
absence_diff_mean=0,model_type='lognormal')
ord_irt_lognormal_est <- id_estimate(idealdata=ord_irt_lognormal_sim,
model_type=11,
restrict_ind_high = as.character(sort(ord_irt_lognormal_sim@simul_data$true_person,
decreasing=T,
index=T)$ix[1]),
restrict_ind_low = as.character(sort(ord_irt_lognormal_sim@simul_data$true_person,
decreasing=F,
index=T)$ix[1]),
fix_high = sort(ord_irt_lognormal_sim@simul_data$true_person,
decreasing=T)[1],fix_low = sort(ord_irt_lognormal_sim@simul_data$true_person,
decreasing=F)[1],nchains = 2,ncores=4,
fixtype='prefix')
## [1] "Running pathfinder to find starting values"
## Finished in 0.7 seconds.
## [1] "Estimating model with full Stan MCMC sampler."
## Running MCMC with 2 parallel chains, with 2 thread(s) per chain...
##
## Chain 2 finished in 24.1 seconds.
## Chain 1 finished in 25.1 seconds.
##
## Both chains finished successfully.
## Mean chain execution time: 24.6 seconds.
## Total execution time: 25.2 seconds.
id_plot_rhats(ord_irt_lognormal_est)
id_plot_legis(ord_irt_lognormal_est,show_true = T)
id_sim_coverage(ord_irt_lognormal_est) %>%
bind_rows(.id='Parameter') %>%
ggplot(aes(y=avg,x=Parameter)) +
stat_summary(fun.args=list(mult=1.96)) +
theme_minimal()
id_plot_sims(ord_irt_lognormal_est,type='Residuals')
id_plot_sims(ord_irt_lognormal_est)
ord_irt_lognormal_est %>%
id_post_pred %>%
id_plot_ppc(ord_irt_lognormal_est,ppc_pred=.) +
scale_x_log10()
## [1] "Processing posterior replications for 2000 scores using 100 posterior samples out of a total of 2000 samples."
## [1] "Now on model 11"
## [1] "Predicting outcomes for model ID 11."
And last but not least, a lognormal model with inflation:
ord_irt_lognormal_abs_sim <- id_sim_gen(num_person=20,num_items=100,ordinal=F,inflate=T,
absence_diff_mean=0,model_type='lognormal')
ord_irt_lognormal_abs_est <- id_estimate(idealdata=ord_irt_lognormal_abs_sim,
model_type=12,
restrict_ind_high = as.character(sort(ord_irt_lognormal_abs_sim@simul_data$true_person,
decreasing=T,
index=T)$ix[1]),
restrict_ind_low = as.character(sort(ord_irt_lognormal_abs_sim@simul_data$true_person,
decreasing=F,
index=T)$ix[1]),fix_high = sort(ord_irt_lognormal_abs_sim@simul_data$true_person,
decreasing=T)[1],fix_low = sort(ord_irt_lognormal_abs_sim@simul_data$true_person,
decreasing=F)[1],nchains = 2,ncores=4,
fixtype='prefix')
## [1] "Running pathfinder to find starting values"
## Finished in 6.6 seconds.
## [1] "Estimating model with full Stan MCMC sampler."
## Running MCMC with 2 parallel chains, with 2 thread(s) per chain...
## Chain 2 finished in 166.2 seconds.
## Chain 1 finished in 170.8 seconds.
##
## Both chains finished successfully.
## Mean chain execution time: 168.5 seconds.
## Total execution time: 170.8 seconds.
id_plot_rhats(ord_irt_lognormal_abs_est)
id_plot_legis(ord_irt_lognormal_abs_est,show_true = T)
id_sim_coverage(ord_irt_lognormal_abs_est) %>%
bind_rows(.id='Parameter') %>%
ggplot(aes(y=avg,x=Parameter)) +
stat_summary(fun.args=list(mult=1.96)) +
theme_minimal()
id_plot_sims(ord_irt_lognormal_abs_est,type='Residuals')
id_plot_sims(ord_irt_lognormal_abs_est)
ord_irt_lognormal_abs_est %>%
id_post_pred %>%
id_plot_ppc(ord_irt_lognormal_abs_est,ppc_pred=.) +
scale_x_log10()
## [1] "Processing posterior replications for 2000 scores using 100 posterior samples out of a total of 2000 samples."
## [1] "Now on model 12"
## [1] "Predicting outcomes for model ID 12."
ord_irt_lognormal_abs_est %>%
id_post_pred(output='missing') %>%
id_plot_ppc(ord_irt_lognormal_abs_est,ppc_pred=.)
## [1] "Processing posterior replications for 2000 scores using 100 posterior samples out of a total of 2000 samples."
## [1] "Now on model 12"
## [1] "Predicting outcomes for model ID 12."
We won’t test all the possible time auto-correlation settings, only a subset of them. I use an inflated binary model as it is a very common one.
While these models are identified, it appears that sometimes given the amount of data, they are only weakly identified in the posterior and Rhat values higher than 1.1 are possible (but are not evidence of multi-modality).
First random walks:
bin_time_irt_2pl_abs_sim <- id_sim_gen(num_person=50,num_items=100,
ordinal=F,inflate=T,
absence_diff_mean=0,
time_process = 'random',
time_points=10)
# as.character(sort(bin_time_irt_2pl_abs_sim@simul_data$true_reg_discrim,
#
# decreasing=TRUE,
# index=T)$ix[1])
# as.character(sort(bin_time_irt_2pl_abs_sim@simul_data$true_reg_discrim,
# decreasing=F,
# index=T)$ix[1])
# 5*sort(bin_time_irt_2pl_abs_sim@simul_data$true_reg_discrim,
#
# decreasing=TRUE)[1]
best_first_time <- 4
best_second_time <- 500
bin_time_irt_2pl_abs_est <- id_estimate(idealdata=bin_time_irt_2pl_abs_sim,
model_type=2,nchains=2,ncores = 6,
id_refresh=100,time_var = 10,
niters = 500,warmup = 500,const_type = "items",
keep_param=list(person_vary=T,person_int=F,item=TRUE,
extra=T),
restrict_sd_high=10,
restrict_sd_low=10,
vary_ideal_pts = 'random_walk',
restrict_var = F,
restrict_ind_high = as.character(sort(bin_time_irt_2pl_abs_sim@simul_data$true_reg_discrim,
decreasing=TRUE,
index=TRUE)$ix[1]),
restrict_ind_low = as.character(sort(bin_time_irt_2pl_abs_sim@simul_data$true_reg_discrim,
decreasing=F,
index=T)$ix[1]),
fixtype='prefix')
## [1] "Running pathfinder to find starting values"
## Path [1] :Initial log joint density = -6381.800927
## Path [1] : Iter log prob ||dx|| ||grad|| alpha alpha0 # evals ELBO Best ELBO Notes
## 99 -3.373e+03 6.273e-03 3.098e-02 3.005e-01 1.000e+00 2476 -3.697e+03 -3.697e+03
## Path [1] : Iter log prob ||dx|| ||grad|| alpha alpha0 # evals ELBO Best ELBO Notes
## 106 -3.373e+03 1.193e-03 1.138e-02 1.000e+00 1.000e+00 2651 -3.552e+03 -3.552e+03
## Path [1] :Best Iter: [5] ELBO (-3510.375932) evaluations: (2651)
## Finished in 1.8 seconds.
## [1] "Estimating model with full Stan MCMC sampler."
## Running MCMC with 2 parallel chains, with 3 thread(s) per chain...
##
## Chain 1 Iteration: 1 / 1000 [ 0%] (Warmup)
## Chain 2 Iteration: 1 / 1000 [ 0%] (Warmup)
## Chain 1 Iteration: 100 / 1000 [ 10%] (Warmup)
## Chain 2 Iteration: 100 / 1000 [ 10%] (Warmup)
## Chain 1 Iteration: 200 / 1000 [ 20%] (Warmup)
## Chain 2 Iteration: 200 / 1000 [ 20%] (Warmup)
## Chain 1 Iteration: 300 / 1000 [ 30%] (Warmup)
## Chain 2 Iteration: 300 / 1000 [ 30%] (Warmup)
## Chain 1 Iteration: 400 / 1000 [ 40%] (Warmup)
## Chain 2 Iteration: 400 / 1000 [ 40%] (Warmup)
## Chain 1 Iteration: 500 / 1000 [ 50%] (Warmup)
## Chain 1 Iteration: 501 / 1000 [ 50%] (Sampling)
## Chain 2 Iteration: 500 / 1000 [ 50%] (Warmup)
## Chain 2 Iteration: 501 / 1000 [ 50%] (Sampling)
## Chain 1 Iteration: 600 / 1000 [ 60%] (Sampling)
## Chain 2 Iteration: 600 / 1000 [ 60%] (Sampling)
## Chain 1 Iteration: 700 / 1000 [ 70%] (Sampling)
## Chain 2 Iteration: 700 / 1000 [ 70%] (Sampling)
## Chain 1 Iteration: 800 / 1000 [ 80%] (Sampling)
## Chain 2 Iteration: 800 / 1000 [ 80%] (Sampling)
## Chain 1 Iteration: 900 / 1000 [ 90%] (Sampling)
## Chain 2 Iteration: 900 / 1000 [ 90%] (Sampling)
## Chain 1 Iteration: 1000 / 1000 [100%] (Sampling)
## Chain 1 finished in 51.2 seconds.
## Chain 2 Iteration: 1000 / 1000 [100%] (Sampling)
## Chain 2 finished in 51.7 seconds.
##
## Both chains finished successfully.
## Mean chain execution time: 51.5 seconds.
## Total execution time: 51.8 seconds.
id_plot_rhats(bin_time_irt_2pl_abs_est) + theme(text=element_text(family=""))
id_sim_coverage(bin_time_irt_2pl_abs_est) %>%
bind_rows(.id='Parameter') %>%
ggplot(aes(y=avg,x=Parameter)) +
stat_summary(fun.args=list(mult=1.96)) +
theme_minimal()
Let’s do a time-varying ideal point plot with true point overlay:
id_plot_legis_dyn(bin_time_irt_2pl_abs_est,plot_sim = T) + facet_wrap(~person_id,scales="free_y")
id_plot_sims(bin_time_irt_2pl_abs_est,type='Residuals')
id_plot_sims(bin_time_irt_2pl_abs_est)
bin_time_spline_irt_2pl_abs_sim <- id_sim_gen(num_person=100,num_items=100,inflate=F,
absence_diff_mean=0,
time_process = 'random',
time_points=10,time_sd = .25)
sort_ids_high <- sort(bin_time_spline_irt_2pl_abs_sim@simul_data$true_reg_discrim,
decreasing=TRUE,
index=TRUE)$ix
high_restrict <- c(min(sort_ids_high[1:20]),
round(median(sort_ids_high[1:20])),
max(sort_ids_high[1:20]))
sort_ids_low <- sort(bin_time_spline_irt_2pl_abs_sim@simul_data$true_reg_discrim,,
decreasing=F,
index=T)$ix
low_restrict <- c(min(sort_ids_low[1:20]),
round(median(sort_ids_low[1:20])),
max(sort_ids_low[1:20]))
bin_time_spline_irt_2pl_abs_est <- id_estimate(idealdata=bin_time_spline_irt_2pl_abs_sim,
use_groups = F,
model_type=1,
time_center_cutoff = 50,
ncores = parallel::detectCores(),
vary_ideal_pts = 'splines',prior_only = FALSE,
spline_knots=NULL,spline_degree = 2,
const_type = "items",person_sd = 5,
diff_reg_sd = 5,
discrim_reg_scale = 2,discrim_reg_shape = 2,
time_var = .1,restrict_var = F,
restrict_ind_high = as.character(high_restrict),
restrict_ind_low =as.character(low_restrict),
fixtype='prefix',adapt_delta=0.95)
## [1] "Running pathfinder to find starting values"
## Finished in 2.7 seconds.
## [1] "Estimating model with full Stan MCMC sampler."
## Running MCMC with 4 parallel chains, with 2 thread(s) per chain...
##
## Chain 2 finished in 19012.6 seconds.
## Chain 3 finished in 33786.6 seconds.
## Chain 4 finished in 33796.8 seconds.
## Chain 1 finished in 33798.0 seconds.
##
## All 4 chains finished successfully.
## Mean chain execution time: 30098.5 seconds.
## Total execution time: 33798.1 seconds.
id_plot_rhats(bin_time_spline_irt_2pl_abs_est)
id_sim_coverage(bin_time_spline_irt_2pl_abs_est) %>%
bind_rows(.id='Parameter') %>%
ggplot(aes(y=avg,x=Parameter)) +
stat_summary(fun.args=list(mult=1.96)) +
theme_minimal()
Let’s do a time-varying ideal point plot with true point overlay:
id_plot_legis_dyn(bin_time_spline_irt_2pl_abs_est,plot_lines = 10) + guides(colour="none") + facet_wrap(~person_id,scales="free_y")
id_plot_sims(bin_time_spline_irt_2pl_abs_est,type='Residuals')
id_plot_sims(bin_time_spline_irt_2pl_abs_est)
And autocorrelation:
bin_time_ar_irt_2pl_abs_sim <- id_sim_gen(num_person=20,num_items=100,inflate=T,
absence_diff_mean=0,time_process = 'AR',time_points=20,time_sd = .1)
bin_time_ar_irt_2pl_abs_sim@score_matrix$group_id <- forcats::fct_collapse(bin_time_ar_irt_2pl_abs_sim@score_matrix$person_id,
`1`=c("1","2","3"))
bin_time_ar_irt_2pl_abs_est <- id_estimate(idealdata=bin_time_ar_irt_2pl_abs_sim,
model_type=2,
ncores = parallel::detectCores(),
vary_ideal_pts = 'AR1',
const_type = "items",
restrict_ind_high = as.character(sort(bin_time_ar_irt_2pl_abs_sim@simul_data$true_reg_discrim,
decreasing=TRUE,
index=TRUE)$ix[1]),
restrict_ind_low = as.character(sort(bin_time_ar_irt_2pl_abs_sim@simul_data$true_reg_discrim,,
decreasing=F,
index=T)$ix[1]),
fixtype='prefix')
## [1] "Running pathfinder to find starting values"
## Finished in 4.0 seconds.
## [1] "Estimating model with full Stan MCMC sampler."
## Running MCMC with 4 parallel chains, with 2 thread(s) per chain...
##
## Chain 4 finished in 123.6 seconds.
## Chain 1 finished in 126.2 seconds.
## Chain 2 finished in 126.8 seconds.
## Chain 3 finished in 127.4 seconds.
##
## All 4 chains finished successfully.
## Mean chain execution time: 126.0 seconds.
## Total execution time: 127.6 seconds.
id_plot_rhats(bin_time_ar_irt_2pl_abs_est)
Let’s do a time-varying ideal point plot with true point overlay:
id_plot_legis_dyn(bin_time_ar_irt_2pl_abs_est,plot_lines = 10) + guides(colour="none") + facet_wrap(~person_id,scales="free_y")
The Gaussian process differs in that is a non-parametric approach to time-series modeling. It can fit very flexible curves and also works on irregularly-spaced time series.
time_gp_sim <- id_sim_gen(num_person=10,num_items=200,inflate=T,
absence_diff_mean=0,
time_process = 'GP',
time_points=20)
## Running MCMC with 1 chain...
##
## Chain 1 Iteration: 1 / 1 [100%] (Sampling)
## Chain 1 finished in 0.0 seconds.
time_gp_est <- id_estimate(idealdata=time_gp_sim,
model_type=2,
use_vb=F,const_type="items",
restrict_ind_high = as.character(sort(time_gp_sim@simul_data$true_reg_discrim,
decreasing=TRUE,
index=TRUE)$ix[1]),
restrict_ind_low = as.character(sort(time_gp_sim@simul_data$true_reg_discrim,,
decreasing=F,
index=T)$ix[1]),
nchains=2,ncores=8,warmup=1000,niters=500,
vary_ideal_pts = 'GP',
fixtype='prefix')
## [1] "Running pathfinder to find starting values"
## Finished in 14.9 seconds.
## [1] "Estimating model with full Stan MCMC sampler."
## Running MCMC with 2 parallel chains, with 4 thread(s) per chain...
## Chain 2 finished in 331.6 seconds.
## Chain 1 finished in 336.5 seconds.
##
## Both chains finished successfully.
## Mean chain execution time: 334.1 seconds.
## Total execution time: 336.7 seconds.
id_plot_rhats(time_gp_est)
id_sim_coverage(time_gp_est) %>%
bind_rows(.id='Parameter') %>%
ggplot(aes(y=avg,x=Parameter)) +
stat_summary(fun.args=list(mult=1.96)) +
theme_minimal()
Let’s do a time-varying ideal point plot with true point overlay:
id_plot_legis_dyn(time_gp_est,plot_sim = T) +facet_wrap(~person_id)
id_plot_sims(time_gp_est,type='Residuals')
id_plot_sims(time_gp_est)
Another new addition is the latent space model, which is only a slight modification on the IRT model.
Non-inflated:
latent_noninfl_sim <- id_sim_gen(num_person=10,num_items=125,ordinal=F,
inflate=F,
model_type='binary',
latent_space=T)
latent_noninfl_est <- id_estimate(idealdata=latent_noninfl_sim,
model_type=13,
restrict_ind_high = as.character(sort(latent_noninfl_sim@simul_data$true_person,
decreasing=TRUE,
index=T)$ix[1]),
restrict_ind_low = as.character(sort(latent_noninfl_sim@simul_data$true_person,
decreasing=F,
index=T)$ix[1]),
fixtype='prefix',id_refresh=100,warmup=250,niters=250,nchains=2,ncores=2)
## [1] "Running pathfinder to find starting values"
## Path [1] :Initial log joint density = -1128.305893
## Path [1] : Iter log prob ||dx|| ||grad|| alpha alpha0 # evals ELBO Best ELBO Notes
## 47 -6.693e+02 1.053e-03 9.242e-01 1.000e-03 1.000e-03 1176 -1.929e+03 -1.929e+03LS failed, Hessian reset
## Path [1] : Iter log prob ||dx|| ||grad|| alpha alpha0 # evals ELBO Best ELBO Notes
## 99 -6.690e+02 2.558e-02 7.742e-01 1.891e+00 9.218e-01 2476 -1.487e+03 -1.487e+03
## Path [1] : Iter log prob ||dx|| ||grad|| alpha alpha0 # evals ELBO Best ELBO Notes
## 181 -6.690e+02 1.081e-06 5.788e-01 8.456e-07 1.000e-03 4526 -2.735e+03 -2.735e+03LS failed, Hessian reset
## Path [1] :Best Iter: [24] ELBO (-787.536684) evaluations: (4526)
## Finished in 1.3 seconds.
## [1] "Estimating model with full Stan MCMC sampler."
## Running MCMC with 2 parallel chains, with 1 thread(s) per chain...
##
## Chain 1 Iteration: 1 / 500 [ 0%] (Warmup)
## Chain 2 Iteration: 1 / 500 [ 0%] (Warmup)
## Chain 1 Iteration: 100 / 500 [ 20%] (Warmup)
## Chain 2 Iteration: 100 / 500 [ 20%] (Warmup)
## Chain 2 Iteration: 200 / 500 [ 40%] (Warmup)
## Chain 1 Iteration: 200 / 500 [ 40%] (Warmup)
## Chain 2 Iteration: 251 / 500 [ 50%] (Sampling)
## Chain 1 Iteration: 251 / 500 [ 50%] (Sampling)
## Chain 2 Iteration: 350 / 500 [ 70%] (Sampling)
## Chain 1 Iteration: 350 / 500 [ 70%] (Sampling)
## Chain 2 Iteration: 450 / 500 [ 90%] (Sampling)
## Chain 1 Iteration: 450 / 500 [ 90%] (Sampling)
## Chain 2 Iteration: 500 / 500 [100%] (Sampling)
## Chain 2 finished in 37.2 seconds.
## Chain 1 Iteration: 500 / 500 [100%] (Sampling)
## Chain 1 finished in 38.6 seconds.
##
## Both chains finished successfully.
## Mean chain execution time: 37.9 seconds.
## Total execution time: 38.6 seconds.
id_plot_rhats(latent_noninfl_est)
id_plot_legis(latent_noninfl_est,show_true = T)
id_sim_coverage(latent_noninfl_est) %>%
bind_rows(.id='Parameter') %>%
ggplot(aes(y=avg,x=Parameter)) +
stat_summary(fun.args=list(mult=1.96)) +
theme_minimal()
Inflated:
# you can't have too few people relative to bills
latent_infl_sim <- id_sim_gen(num_person=20,num_items=125,ordinal=F,
inflate=T,
model_type='binary',
latent_space=T)
latent_infl_est <- id_estimate(idealdata=latent_infl_sim,
model_type=14,
restrict_ind_high = as.character(sort(latent_infl_sim@simul_data$true_person,
decreasing=TRUE,
index=T)$ix[1]),
restrict_ind_low = as.character(sort(latent_infl_sim@simul_data$true_person,
decreasing=F,
index=T)$ix[1]),
fixtype='prefix',
adapt_delta=0.95,
diff_reg_sd = 2,
diff_miss_sd=2)
## [1] "Running pathfinder to find starting values"
## Finished in 1.5 seconds.
## [1] "Estimating model with full Stan MCMC sampler."
## Running MCMC with 4 parallel chains, with 1 thread(s) per chain...
##
## Chain 1 finished in 1110.1 seconds.
## Chain 4 finished in 1117.9 seconds.
## Chain 3 finished in 1196.5 seconds.
## Chain 2 finished in 1208.1 seconds.
##
## All 4 chains finished successfully.
## Mean chain execution time: 1158.2 seconds.
## Total execution time: 1208.2 seconds.
id_plot_rhats(latent_infl_est)
id_plot_legis(latent_infl_est,show_true = T)
id_sim_coverage(latent_infl_est) %>%
bind_rows(.id='Parameter') %>%
ggplot(aes(y=avg,x=Parameter)) +
stat_summary(fun.args=list(mult=1.96)) +
theme_minimal()